Import raw data:

Calibration curve:

# Import data
calibration_curve <- readxl::read_xlsx("../1_Data/OTA_patron_curve.xlsx")
colnames(calibration_curve) <- c("OTA concentration [ppb]", "Area under the curve (AUC)")

# Plot and linear model
plot(calibration_curve, main = "OTA's calibration curve", col = "blue", pch = 16)
model <- lm(calibration_curve$`Area under the curve (AUC)` ~ calibration_curve$`OTA concentration [ppb]`)
abline(model, col = "purple4")

# To obtain the r^2 value
m_summary <- summary(model)

cat("Calibration curve aquation: y =", coef(model)[2], "x +", coef(model)[1], "\n")
## Calibration curve aquation: y = 331472 x + -188315.3
cat("Determination Coefficient R^2:", m_summary$r.squared, "\n")
## Determination Coefficient R^2: 0.9979644
remove(m_summary)

Import targeted metabolomic data

# List with the sheet names and another with the save dataset name:
sheets <- c("Pn856 J17", "Pn856 J25", "Pn856 S17", "Pn856 S25",
            "Pn15 J17", "Pn15 J25", "Pn15 S17", "Pn15 S25",
            "Pn92 J17", "Pn92 J25", "Pn92 S17", "Pn92 S25")

saves <- c("J_856_17", "J_856_25", "S_856_17", "S_856_25",
           "J_15_17", "J_15_25", "S_15_17", "S_15_25",
           "J_92_17", "J_92_25", "S_92_17", "S_92_25")
for (i in 1:length(sheets)){
  tmp_data <- readxl::read_xlsx(path = "../1_Data/Targeted_metabolomic_data.xlsx", sheet = sheets[[i]])
  tmp_data <- as.data.frame(tmp_data) # Save as data frame instead as a tibble
  colnames(tmp_data) <- c("Vial_codification", "Code_name", "AUC_OTA") # Change column name
  row.names(tmp_data) <- tmp_data$Vial_codification # Set first column as row names
  tmp_data <- tmp_data[,c(2,3)] # Eliminate first column
  tmp_data$AUC_OTA[tmp_data$AUC_OTA < 6e3] <- 0 # Apply a condition to set values less than 6*10^3 to 0
  tmp_data$AUC_OTA[tmp_data$AUC_OTA > 1e8] <- NA # Apply a condition to set values greater than 1^8 to NA
  assign(saves[[i]], tmp_data)
}
remove(tmp_data); remove(i)

Data treatment:

Generate the new data object with the sample information per day

for (i in 1:length(saves)){
  df_name <- saves[[i]] # Obtain the name of the object that is going to be use
  tmp_df <- get(df_name) # Obtain the values of the object named as df_name
  
  # With this loop we obtain the OTA's ppb values from OTA's AUC values with the calibration curve equation.   We must multiplicate by 5 due to the dilution factor.
  for (j in 1:nrow(tmp_df)){
    tmp_df$ppb[j] <- ((tmp_df$AUC[j]-coef(model)[1])/coef(model)[2]*5)
    # Apply a condition to set values that have an AUC value = 0 as 0. We do this to prevent the value 0 of AUC from taking positive values when converted to ppb, which could lead to errors. In this case, since the calibration curve has a negative intercept term, converting an AUC of 0 to ppb would result in 2.840586837.
    tmp_df$ppb[tmp_df$ppb == ((0-coef(model)[1])/coef(model)[2]*5)] <- 0
    assign(saves[[i]], tmp_df)
  }
  
  # With the next loop we calculate the mean and SD, from ppb information, of each day for each dataset
  days <- data.frame(OTA_ppb_mean = numeric(), OTA_ppb_sd = numeric())
  for (m in seq(1, nrow(tmp_df), by = 3)) {
    if (m + 2 <= nrow(tmp_df)) {
      mean_day <- mean(tmp_df$ppb[m:(m + 2)], na.rm = TRUE)
      sd_day <- sd(tmp_df$ppb[m:(m + 2)], na.rm = TRUE)
      days <- rbind(days, data.frame(OTA_ppb_mean = mean_day, OTA_ppb_sd = sd_day))
    }
  }

  days <- cbind(Sampling_Day = c(1:12), days)
  assign(paste0("days_", saves[[i]]), days)
}
remove(days); remove(i); remove(j); remove(m); remove(mean_day);
remove(sd_day); remove(tmp_df); remove(df_name)

Histogram plot

library(tidyverse)
library(plotly)
library(htmltools)

generate_plot <- function(data, plot_title) {
  ggplot(data) +
    geom_bar(aes(x = Sampling_Day, y = OTA_ppb_mean), stat = "identity", fill = "forestgreen", alpha = 0.5) +
    geom_errorbar(aes(x = Sampling_Day, ymin = OTA_ppb_mean - OTA_ppb_sd, ymax = OTA_ppb_mean + OTA_ppb_sd),
                  width = 0.4, colour = "orange", alpha = 0.9, size = 1.5) +
    ggtitle(plot_title)
}

plots <- list()
for (i in 1:length(saves)) {
  tmp_df <- get(paste0("days_", saves[[i]]))
  plot_title <- (paste("Amount of OTA productive per day by", as.character(sheets[[i]])))
  p <- generate_plot(tmp_df, plot_title) + 
    theme(plot.title = element_text(hjust = 0.5, color="black", size=14, face="bold"))
  p <- ggplotly(p, tooltip ="all") 
  plots[[i]] <- p
}